Oozie Sqoop Action Extension
必须为Hadoop 0.23及以上版本,当前作业完成之后才能执行下个action。
需配置:sqoop
活动的 job-tracker, name-node, command, arg
以及配置。
配置可以指定 job-xml
元素或 行内 configuration
元素,Hadoop mapred.job.tracker
和 fs.default.name
不鞥出现在 行内 configuration
元素中。
语法:
<workflow-app name="[WF-DEF-NAME]" xmlns="uri:oozie:workflow:0.1">
...
<action name="[NODE-NAME]">
<sqoop xmlns="uri:oozie:sqoop-action:0.2">
<job-tracker>[JOB-TRACKER]</job-tracker>
<name-node>[NAME-NODE]</name-node>
<prepare>
<delete path="[PATH]"/>
...
<mkdir path="[PATH]"/>
...
</prepare>
<configuration>
<property>
<name>[PROPERTY-NAME]</name>
<value>[PROPERTY-VALUE]</value>
</property>
...
</configuration>
<command>[SQOOP-COMMAND]</command>
<arg>[SQOOP-ARGUMENT]</arg>
...
<file>[FILE-PATH]</file>
...
<archive>[FILE-PATH]</archive>
...
</sqoop>
<ok to="[NODE-NAME]"/>
<error to="[NODE-NAME]"/>
</action>
...
</workflow-app>
Sqoop command
可以通过 command
或 arg
元素来指定command,使用 command
元素会分割命令 每个空白 为多个参数,使用 arg
将提交每个参数,并空白符分割。
使用 command 元素例子
<workflow-app name="sample-wf" xmlns="uri:oozie:workflow:0.1">
...
<action name="myfirsthivejob">
<sqoop xmlns="uri:oozie:sqoop-action:0.2">
<job-traker>foo:8021</job-tracker>
<name-node>bar:8020</name-node>
<prepare>
<delete path="${jobOutput}"/>
</prepare>
<configuration>
<property>
<name>mapred.compress.map.output</name>
<value>true</value>
</property>
</configuration>
<command>import --connect jdbc:hsqldb:file:db.hsqldb --table TT --target-dir hdfs://localhost:8020/user/tucu/foo -m 1</command>
</sqoop>
<ok to="myotherjob"/>
<error to="errorcleanup"/>
</action>
...
</workflow-app>
使用 arg 元素例子
<workflow-app name="sample-wf" xmlns="uri:oozie:workflow:0.1">
...
<action name="myfirsthivejob">
<sqoop xmlns="uri:oozie:sqoop-action:0.2">
<job-traker>foo:8021</job-tracker>
<name-node>bar:8020</name-node>
<prepare>
<delete path="${jobOutput}"/>
</prepare>
<configuration>
<property>
<name>mapred.compress.map.output</name>
<value>true</value>
</property>
</configuration>
<arg>import</arg>
<arg>--connect</arg>
<arg>jdbc:hsqldb:file:db.hsqldb</arg>
<arg>--table</arg>
<arg>TT</arg>
<arg>--target-dir</arg>
<arg>hdfs://localhost:8020/user/tucu/foo</arg>
<arg>-m</arg>
<arg>1</arg>
</sqoop>
<ok to="myotherjob"/>
<error to="errorcleanup"/>
</action>
...
</workflow-app>
活动计数器:hadoop:counters()
活动日志:设置日志等级,oozie.sqoop.log.level
属性,默认为 INFO
Sqoop Action Schema Version 0.3
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:sqoop="uri:oozie:sqoop-action:0.3" elementFormDefault="qualified"
targetNamespace="uri:oozie:sqoop-action:0.3"> <xs:element name="sqoop" type="sqoop:ACTION"/>
<xs:complexType name="ACTION">
<xs:sequence>
<xs:element name="job-tracker" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="name-node" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="prepare" type="sqoop:PREPARE" minOccurs="0" maxOccurs="1"/>
<xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="configuration" type="sqoop:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
<xs:choice>
<xs:element name="command" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="arg" type="xs:string" minOccurs="1" maxOccurs="unbounded"/>
</xs:choice>
<xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
<xs:complexType name="CONFIGURATION">
<xs:sequence>
<xs:element name="property" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="PREPARE">
<xs:sequence>
<xs:element name="delete" type="sqoop:DELETE" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="mkdir" type="sqoop:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
<xs:complexType name="DELETE">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
<xs:complexType name="MKDIR">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
</xs:schema>
Sqoop Action Schema Version 0.2
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:sqoop="uri:oozie:sqoop-action:0.2" elementFormDefault="qualified"
targetNamespace="uri:oozie:sqoop-action:0.2"> <xs:element name="sqoop" type="sqoop:ACTION"/>
.
<xs:complexType name="ACTION">
<xs:sequence>
<xs:element name="job-tracker" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="name-node" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="prepare" type="sqoop:PREPARE" minOccurs="0" maxOccurs="1"/>
<xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="configuration" type="sqoop:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
<xs:choice>
<xs:element name="command" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="arg" type="xs:string" minOccurs="1" maxOccurs="unbounded"/>
</xs:choice>
<xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="CONFIGURATION">
<xs:sequence>
<xs:element name="property" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="PREPARE">
<xs:sequence>
<xs:element name="delete" type="sqoop:DELETE" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="mkdir" type="sqoop:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="DELETE">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
<xs:complexType name="MKDIR">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
</xs:schema>